This Python Jupyter notebook can parse watercare bills and extract the usage data. It can be used to generate plots of the usage data.

You can download this notebook from https://github.com/neon-ninja/watercare/blob/main/parse.ipynb

You can download PDFs from https://myaccount.watercare.co.nz/bills-and-payments. Set your start date filter as far back as it'll go, and download all the PDFs into a folder called pdfs. Here's a JS snippet you can put in your browser console to save you clicking them all:

var links = document.querySelectorAll('button.flex.hover\\:bg-blue-100')
console.log(`Found ${links.length} links`)
var index = 0
setInterval(function() {
    if (index >= links.length) {
        return
    }
    console.log(index)
    links[index].click()
    index++
}, 250)
In [1]:
import pandas as pd # Tabular data
pd.options.plotting.backend = "plotly"
import plotly.express as px # Plotting
from glob import glob # Finding files
import pdftotext # PDF parsing
from tqdm.auto import tqdm # Progress bars
import re # Regular expressions
from datetime import datetime
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

files = sorted(glob("pdfs/*.pdf"))
len(files)
Out[1]:
64
In [2]:
def read_PDF(filename):
    with open(filename, "rb") as f:
        pdf = pdftotext.PDF(f)
        text = ""
        for page_text in pdf:
            text += page_text.strip()
        return text

def parse_number(amount):
    if amount.endswith(" cr"):
        return -float(amount[:-3])
    return float(amount)

def parse_text(text):
    if "$500 gift card" in text:
        # This breaks the dollar_amounts regex
        text = text.replace("$ 50", "")
    dates = re.findall(r'(\d{2} [A-Z][a-z]{2} \d{4})', text)
    dollar_amounts = [parse_number(d) for d in re.findall(r'\$ ([\d\.]+(?: cr)?)', text)]
    usage = [parse_number(d) for d in re.findall(r'([\d\.]+) kL', text)]
    unit_rates = [parse_number(d) for d in re.findall(r'\$([\d\.]+)/kL', text)]
    if len(unit_rates) == 4:
        # Rate changed
        unit_rates = unit_rates[::2]

    consumption_period = re.search(r"Consumption period (\d+) days", text).group(1)
    this_reading_match = re.search(r"This reading\s+(\d{2}-\w{3}-\d{2})\s+(\d+)\s*(Estimate|Actual)?", text)
    this_reading_date = this_reading_match.group(1)
    this_reading_date = datetime.strptime(this_reading_date, "%d-%b-%y")
    this_reading_value = int(this_reading_match.group(2))
    this_reading_type = this_reading_match.group(3)

    return {
        "Invoice date": dates[0],
        "Due date": dates[1],
        "Total due ($)": dollar_amounts[0],
        "Opening balance ($)": dollar_amounts[1],
        "Payments received ($)": dollar_amounts[2],
        "Balance still owing ($)": dollar_amounts[3],
        "Water consumption ($)": dollar_amounts[4],
        "Wastewater consumption ($)": dollar_amounts[5],
        "Wastewater fixed ($)": dollar_amounts[6],
        "Balance of current charges ($)": dollar_amounts[7],
        "GST ($)": dollar_amounts[8],
        "Consumption period (days)": int(consumption_period),
        "Reading date": this_reading_date,
        "Reading value (kL)": this_reading_value,
        "Reading type": this_reading_type,
        "Water consumption (kL)": usage[0],
        "Wastewater consumption (kL)": usage[1],
        "Wastewater rate (%)": float(re.search(r"@(\d+.\d+)%", text).group(1)),
        "Water unit rate ($/kL)": unit_rates[0],
        "Wastewater unit rate ($/kL)": unit_rates[1],
    }

results = []
for f in tqdm(files):
    text = read_PDF(f)
    results.append(parse_text(text))
df = pd.DataFrame(results)
for col in ['Invoice date', 'Due date', 'Reading date']:
    df[col] = pd.to_datetime(df[col])
df.sort_values("Invoice date", inplace=True, ascending=False)
df
  0%|          | 0/64 [00:00<?, ?it/s]
Out[2]:
Invoice date Due date Total due ($) Opening balance ($) Payments received ($) Balance still owing ($) Water consumption ($) Wastewater consumption ($) Wastewater fixed ($) Balance of current charges ($) GST ($) Consumption period (days) Reading date Reading value (kL) Reading type Water consumption (kL) Wastewater consumption (kL) Wastewater rate (%) Water unit rate ($/kL) Wastewater unit rate ($/kL)
0 2024-09-06 2024-09-27 484.90 40.24 -40.24 0.00 192.78 263.24 28.88 484.90 63.25 34 2024-09-03 1200 Actual 90.00 70.65 78.5 2.142 3.726
63 2024-07-31 2024-08-21 40.24 -2.96 0.00 -2.96 8.57 11.70 22.93 43.20 5.63 27 2024-07-31 1110 Estimate 4.00 3.14 78.5 2.142 3.726
11 2024-07-05 2024-07-26 0.00 -54.22 0.00 -54.22 9.99 13.63 27.64 51.26 6.69 35 2024-07-04 1106 Actual 5.00 3.92 78.5 1.998 3.476
22 2024-05-30 2024-06-20 0.00 -86.57 0.00 -86.57 5.99 8.20 18.16 32.35 4.22 23 2024-05-30 1101 Estimate 3.00 2.36 78.5 1.998 3.476
33 2024-05-08 2024-05-29 0.00 45.73 -45.73 0.00 -49.95 -68.20 31.58 -86.57 -11.29 40 2024-05-07 1098 Actual 25.00 19.62 78.5 1.998 3.476
44 2024-03-28 2024-04-18 45.73 74.91 -74.91 0.00 11.99 16.37 17.37 45.73 5.96 22 2024-03-28 1123 Estimate 6.00 4.71 78.5 1.998 3.476
55 2024-03-07 2024-03-28 74.91 67.79 -67.79 0.00 19.98 27.29 27.64 74.91 9.77 35 2024-03-06 1117 Estimate 10.00 7.85 78.5 1.998 3.476
60 2024-01-31 2024-02-21 67.79 66.21 -66.21 0.00 17.98 24.54 25.27 67.79 8.84 32 2024-01-31 1107 Estimate 9.00 7.06 78.5 1.998 3.476
61 2024-01-03 2024-01-24 66.21 59.92 -59.92 0.00 17.98 24.54 23.69 66.21 8.64 30 2023-12-30 1098 Estimate 9.00 7.06 78.5 1.998 3.476
62 2023-11-30 2023-12-21 59.92 74.12 -74.12 0.00 15.98 21.83 22.11 59.92 7.82 28 2023-11-30 1089 Estimate 8.00 6.28 78.5 1.998 3.476
1 2023-11-03 2023-11-24 74.12 60.71 -60.71 0.00 19.98 27.29 26.85 74.12 9.67 34 2023-11-02 1081 Estimate 10.00 7.85 78.5 1.998 3.476
2 2023-09-29 2023-10-20 60.71 57.59 -57.59 0.00 15.98 21.83 22.90 60.71 7.92 29 2023-09-29 1071 Estimate 8.00 6.28 78.5 1.998 3.476
3 2023-09-01 2023-09-22 57.59 70.65 -70.65 0.00 13.99 19.12 24.48 57.59 7.51 31 2023-08-31 1063 Actual 7.00 5.50 78.5 1.998 3.476
4 2023-07-31 2023-08-21 70.65 60.53 -60.53 0.00 20.08 27.42 23.15 70.65 9.22 32 2023-07-31 1056 Estimate 11.00 8.64 78.5 1.825 3.174
5 2023-07-03 2023-07-24 60.53 74.22 -74.22 0.00 16.42 22.41 21.70 60.53 7.90 30 2023-06-29 1045 Actual 9.00 7.06 78.5 1.825 3.174
6 2023-05-30 2023-06-20 74.22 85.75 -85.75 0.00 21.90 29.90 22.42 74.22 9.68 31 2023-05-30 1036 Estimate 12.00 9.42 78.5 1.825 3.174
7 2023-05-01 2023-05-22 85.75 61.25 -61.25 0.00 27.38 37.39 20.98 85.75 11.18 29 2023-04-29 1024 Actual 15.00 11.78 78.5 1.825 3.174
8 2023-03-31 2023-04-21 61.25 37.52 -37.52 0.00 16.42 22.41 22.42 61.25 7.99 31 2023-03-31 1009 Estimate 9.00 7.06 78.5 1.825 3.174
9 2023-03-01 2023-03-22 37.52 86.47 -86.47 0.00 7.30 9.97 20.25 37.52 4.89 28 2023-02-28 1000 Actual 4.00 3.14 78.5 1.825 3.174
10 2023-01-31 2023-02-21 86.47 66.31 -66.31 0.00 25.55 34.88 26.04 86.47 11.28 36 2023-01-31 996 Estimate 14.00 10.99 78.5 1.825 3.174
12 2023-01-04 2023-01-25 66.31 63.42 -63.42 0.00 20.08 27.42 18.81 66.31 8.65 26 2022-12-26 982 Actual 11.00 8.64 78.5 1.825 3.174
13 2022-11-30 2022-12-21 63.42 71.37 -71.37 0.00 18.25 24.92 20.25 63.42 8.27 28 2022-11-30 971 Estimate 10.00 7.85 78.5 1.825 3.174
14 2022-11-03 2022-11-24 71.37 68.48 -68.48 0.00 20.08 27.42 23.87 71.37 9.31 33 2022-11-02 961 Actual 11.00 8.64 78.5 1.825 3.174
15 2022-09-30 2022-10-21 68.48 54.83 -54.83 0.00 20.08 27.42 20.98 68.48 8.93 29 2022-09-30 950 Estimate 11.00 8.64 78.5 1.825 3.174
16 2022-09-02 2022-09-23 54.83 77.79 -77.79 0.00 12.78 17.46 24.59 54.83 7.15 34 2022-09-01 939 Actual 7.00 5.50 78.5 1.825 3.174
17 2022-07-29 2022-08-19 77.79 116.49 -116.49 0.00 25.55 34.88 17.36 77.79 10.15 24 2022-07-29 932 Estimate 14.00 10.99 78.5 1.825 3.174
18 2022-07-06 2022-07-27 116.49 70.70 -70.70 0.00 39.24 53.57 23.68 116.49 15.19 35 2022-07-05 918 Actual 23.00 18.06 78.5 1.706 2.966
20 2022-05-31 2022-06-21 70.70 99.67 -99.67 0.00 22.18 30.25 18.27 70.70 9.22 27 2022-05-31 895 Estimate 13.00 10.20 78.5 1.706 2.966
19 2022-05-05 2022-05-26 99.67 58.61 -58.61 0.00 32.41 44.25 23.01 99.67 13.00 34 2022-05-04 882 Actual 19.00 14.92 78.5 1.706 2.966
21 2022-03-31 2022-04-21 58.61 80.16 -80.16 0.00 17.06 23.28 18.27 58.61 7.64 27 2022-03-31 863 Estimate 10.00 7.85 78.5 1.706 2.966
23 2022-03-07 2022-03-28 80.16 42.46 -42.46 0.00 23.88 32.60 23.68 80.16 10.46 35 2022-03-04 853 Actual 14.00 10.99 78.5 1.706 2.966
24 2022-01-28 2022-02-18 42.46 74.80 -74.80 0.00 11.94 16.31 14.21 42.46 5.54 21 2022-01-28 839 Estimate 7.00 5.50 78.5 1.706 2.966
25 2022-01-10 2022-02-01 74.80 63.35 -63.35 0.00 20.47 27.94 26.39 74.80 9.76 39 2022-01-07 832 Actual 12.00 9.42 78.5 1.706 2.966
26 2021-11-29 2021-12-20 63.35 66.73 -66.73 0.00 18.77 25.63 18.95 63.35 8.26 28 2021-11-29 820 Estimate 11.00 8.64 78.5 1.706 2.966
27 2021-11-02 2021-11-23 66.73 72.73 -72.73 0.00 18.77 25.63 22.33 66.73 8.70 33 2021-11-01 809 Actual 11.00 8.64 78.5 1.706 2.966
28 2021-09-29 2021-10-20 72.73 74.08 -74.08 0.00 22.18 30.25 20.30 72.73 9.49 30 2021-09-29 798 Estimate 13.00 10.20 78.5 1.706 2.966
30 2021-08-31 2021-09-21 74.08 67.98 -67.98 0.00 22.18 30.25 21.65 74.08 9.66 32 2021-08-30 785 Estimate 13.00 10.20 78.5 1.706 2.966
29 2021-07-29 2021-08-19 67.98 80.57 -80.57 0.00 20.72 28.27 18.99 67.98 8.87 30 2021-07-29 772 Estimate 13.00 10.20 78.5 1.594 2.772
31 2021-06-30 2021-07-21 80.57 51.64 -51.64 0.00 25.50 34.82 20.25 80.57 10.51 32 2021-06-29 759 Actual 16.00 12.56 78.5 1.594 2.772
32 2021-05-28 2021-06-18 51.64 57.32 -57.32 0.00 14.35 19.57 17.72 51.64 6.74 28 2021-05-28 743 Estimate 9.00 7.06 78.5 1.594 2.772
34 2021-05-03 2021-05-24 57.32 61.73 -61.73 0.00 15.94 21.76 19.62 57.32 7.48 31 2021-04-30 734 Actual 10.00 7.85 78.5 1.594 2.772
35 2021-03-30 2021-04-20 61.73 51.64 -51.64 0.00 17.53 23.95 20.25 61.73 8.05 32 2021-03-30 724 Estimate 11.00 8.64 78.5 1.594 2.772
36 2021-03-01 2021-03-22 51.64 56.05 -56.05 0.00 14.35 19.57 17.72 51.64 6.74 28 2021-02-26 713 Actual 9.00 7.06 78.5 1.594 2.772
37 2021-01-29 2021-02-19 56.05 61.10 -61.10 0.00 15.94 21.76 18.35 56.05 7.31 29 2021-01-29 704 Estimate 10.00 7.85 78.5 1.594 2.772
38 2021-01-05 2021-01-26 61.10 61.10 -61.10 0.00 17.53 23.95 19.62 61.10 7.97 31 2020-12-31 694 Actual 11.00 8.64 78.5 1.594 2.772
39 2020-11-30 2020-12-21 61.10 64.23 -64.23 0.00 17.53 23.95 19.62 61.10 7.97 31 2020-11-30 683 Estimate 11.00 8.64 78.5 1.594 2.772
40 2020-11-02 2020-11-23 64.23 58.58 -58.58 0.00 19.13 26.11 18.99 64.23 8.38 30 2020-10-30 672 Actual 12.00 9.42 78.5 1.594 2.772
42 2020-09-30 2020-10-21 58.58 48.51 -48.51 0.00 15.94 21.76 20.88 58.58 7.64 33 2020-09-30 660 Estimate 10.00 7.85 78.5 1.594 2.772
41 2020-08-31 2020-09-21 48.51 56.69 -56.69 0.00 12.75 17.41 18.35 48.51 6.33 29 2020-08-28 650 Actual 8.00 6.28 78.5 1.594 2.772
43 2020-07-30 2020-08-20 56.69 49.15 -49.15 0.00 15.94 21.76 18.99 56.69 7.39 30 2020-07-30 642 Estimate 10.00 7.85 78.5 1.594 2.772
45 2020-07-01 2020-07-22 49.15 62.01 -62.01 0.00 12.44 16.98 19.73 49.15 6.41 32 2020-06-30 632 Actual 8.00 6.28 78.5 1.555 2.704
46 2020-05-29 2020-06-19 62.01 63.24 -63.24 0.00 18.66 25.47 17.88 62.01 8.09 29 2020-05-29 624 Estimate 12.00 9.42 78.5 1.555 2.704
47 2020-05-01 2020-05-22 63.24 70.60 -70.60 0.00 18.66 25.47 19.11 63.24 8.25 31 2020-04-30 612 Actual 12.00 9.42 78.5 1.555 2.704
48 2020-03-30 2020-04-20 70.60 57.72 -57.72 0.00 21.77 29.72 19.11 70.60 9.21 31 2020-03-30 600 Estimate 14.00 10.99 78.5 1.555 2.704
49 2020-03-02 2020-03-23 57.72 74.28 -74.28 0.00 17.10 23.36 17.26 57.72 7.53 28 2020-02-28 586 Actual 11.00 8.64 78.5 1.555 2.704
50 2020-01-31 2020-02-21 74.28 93.28 -93.28 0.00 23.32 31.85 19.11 74.28 9.69 31 2020-01-31 575 Estimate 15.00 11.78 78.5 1.555 2.704
51 2020-01-03 2020-01-24 93.28 54.66 -54.66 0.00 31.10 42.45 19.73 93.28 12.17 32 2019-12-31 560 Actual 20.00 15.70 78.5 1.555 2.704
52 2019-12-05 2019-12-30 54.66 0.00 0.00 0.00 15.55 21.23 17.88 54.66 7.13 29 2019-11-29 540 Estimate 10.00 7.85 78.5 1.555 2.704
53 2019-11-05 2019-11-26 52.23 66.94 -66.94 0.00 14.00 19.12 19.11 52.23 6.81 31 2019-10-31 530 Actual 9.00 7.07 78.5 1.555 2.704
54 2019-10-02 2019-10-23 66.94 58.96 -58.96 0.00 20.22 27.61 19.11 66.94 8.73 31 2019-09-30 521 Estimate 13.00 10.21 78.5 1.555 2.704
56 2019-09-04 2019-09-25 58.96 73.67 -73.67 0.00 17.11 23.36 18.49 58.96 7.69 30 2019-08-30 508 Actual 11.00 8.64 78.5 1.555 2.704
57 2019-08-02 2019-08-23 73.67 79.33 -79.33 0.00 23.33 31.85 18.49 73.67 9.61 30 2019-07-31 497 Estimate 15.00 11.78 78.5 1.555 2.704
58 2019-07-04 2019-07-25 79.33 67.93 -67.93 0.00 25.81 34.98 18.54 79.33 10.35 31 2019-07-01 482 Actual 16.45 0.55 78.5 1.517 2.618
59 2019-06-05 2019-06-26 67.93 76.86 -76.86 0.00 21.24 28.77 17.92 67.93 8.86 30 2019-05-31 465 Estimate 14.00 10.99 78.5 1.517 2.618
In [3]:
df.describe()
Out[3]:
Invoice date Due date Total due ($) Opening balance ($) Payments received ($) Balance still owing ($) Water consumption ($) Wastewater consumption ($) Wastewater fixed ($) Balance of current charges ($) GST ($) Consumption period (days) Reading date Reading value (kL) Water consumption (kL) Wastewater consumption (kL) Wastewater rate (%) Water unit rate ($/kL) Wastewater unit rate ($/kL)
count 64 64 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64 64.000000 64.000000 64.000000 64.0 64.000000 64.000000
mean 2022-01-15 21:22:30 2022-02-05 23:15:00 69.244219 59.806406 -62.052500 -2.246094 20.400781 27.845312 20.998125 69.244219 9.032187 30.500000 2022-01-14 10:07:30 835.015625 12.475781 9.601094 78.5 1.737016 3.020250
min 2019-06-05 00:00:00 2019-06-26 00:00:00 0.000000 -86.570000 -116.490000 -86.570000 -49.950000 -68.200000 14.210000 -86.570000 -11.290000 21.000000 2019-05-31 00:00:00 465.000000 3.000000 0.550000 78.5 1.517000 2.618000
25% 2020-09-22 12:00:00 2020-10-13 12:00:00 57.162500 57.162500 -73.772500 0.000000 15.940000 21.760000 18.527500 57.162500 7.457500 29.000000 2020-09-21 18:00:00 657.500000 9.000000 7.060000 78.5 1.594000 2.772000
50% 2022-01-19 00:00:00 2022-02-09 12:00:00 63.385000 63.385000 -63.385000 0.000000 18.455000 25.195000 19.990000 63.385000 8.265000 31.000000 2022-01-17 12:00:00 835.500000 11.000000 8.640000 78.5 1.706000 2.966000
75% 2023-05-08 06:00:00 2023-05-29 06:00:00 73.772500 73.772500 -57.162500 0.000000 21.802500 29.765000 22.950000 73.772500 9.622500 32.000000 2023-05-06 18:00:00 1027.000000 13.000000 10.200000 78.5 1.825000 3.174000
max 2024-09-06 00:00:00 2024-09-27 00:00:00 484.900000 116.490000 0.000000 0.000000 192.780000 263.240000 31.580000 484.900000 63.250000 40.000000 2024-09-03 00:00:00 1200.000000 90.000000 70.650000 78.5 2.142000 3.726000
std NaN NaN 56.223242 29.704873 20.942004 12.674220 24.183486 33.020630 3.312389 57.812202 7.540910 3.427248 NaN 207.889355 10.618429 8.404957 0.0 0.175876 0.307204
In [4]:
df.groupby(df["Invoice date"].dt.year)["Balance of current charges ($)"].sum()
Out[4]:
Invoice date
2019    453.72
2020    759.39
2021    764.92
2022    878.78
2023    795.04
2024    779.78
Name: Balance of current charges ($), dtype: float64
In [5]:
px.scatter(df, x="Reading date", y="Reading value (kL)", color="Reading type", title="Reading values (kL)", trendline="ols")
In [6]:
px.bar(df, x="Reading date", y="Water consumption (kL)")
In [7]:
df["Average daily usage (L)"] = df["Water consumption (kL)"] * 1000 / df["Consumption period (days)"]
px.bar(df, x="Reading date", y="Average daily usage (L)")
In [8]:
px.bar(df, x="Reading date", y="Balance of current charges ($)")
In [9]:
px.line(df, x="Reading date", y=["Water unit rate ($/kL)", "Wastewater unit rate ($/kL)"], title="Unit rates ($/kL)")